//! @file prog1.cc
//! @brief This program builds a simple simulator of multicore systems. Its purpose is to
//! demonstrate how to build such a simulator using the Manifold framework.
//! The multicore system is illustrated as follows:
//!
//! @verbatim
//!      -----------       -----------     ----------------     -----------
//!     | processor |     | processor |   | mem controller |   | processor |
//!      -----------       -----------     ----------------     -----------
//!           |                 |                  |                 |
//!        -------           -------               |              -------
//!       | cache |         | cache |              |             | cache |    
//!        -------           -------               |              -------
//!           |                 |                  |                 |
//!           |                 |                  |                 |
//!  ---------------------------------------------------------------------------
//!     ------------      ------------       ------------      ------------ 
//!    | NetIntface |    | NetIntface |     | NetIntface |    | NetIntface |  
//!     ------------      ------------       ------------      ------------ 
//!  ---------------------------------------------------------------------------
//! @endverbatim
//!
//! The components used in this program are very simple ones:
//! - MockProc: a simple processor model that sends memory requests to the cache
//!             and upon receiving a response sends another one.
//! - SimpleMC: a simple memory controller model. Upon receiving a request, it sends
//!           back a response after certain delay. In this program, there is only
//!           one MC in the multicore system.
//! - simple-cache: a simple write-through cache with no write-alloc.
//! - simple-net: a simple mesh network with no flow control.
//!
//! In this program the configuration for the components is hard-coded except the
//! dimensions of the mesh network which must be provided at the command-line.
//! 
//! To run this program, type:
//! @code
//! mpirun -np <NP> prog1 <X> <Y>
//! @endcode
//! where <NP> is the number of MPI tasks, which must be 1, 2, or X*Y+1,
//! and <X> and <Y> are the dimensions of the mesh network.
//!
//! Partitioning of the components is also hard-coded based on the number of LPs:
//!     - 1 LP: everything is on LP 0.
//!     - 2 LPs: the network is on LP 0 and everything else is on LP 1.
//!     - X*Y+1 LPs: the network is on LP, and each node (processor, cache and adapter or
//!                mem controller and adapter) is on a separate LP.
//!
//! With simple-net, the IDs of the network interface are 0 to N-1 where N is X*Y.
//! In this program the IDs of the nodes are also 0 to N-1, so the mapping from node ID
//! to network interface ID is straightforward.
//!
#include "kernel/clock.h"
#include "kernel/component.h"
#include "kernel/manifold.h"
#include "simple-net/interfaceCreator.h"
#include "simple-net/network.h"
#include "simple-net/networkInterface.h"
#include "simple-cache/simple_cache.h"
#include "simple-mc/simple_mc.h"
#include "mockProc.h"
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include "mpi.h"

using namespace std;
using namespace manifold::kernel;
using namespace manifold::simple_cache;
using namespace manifold::simple_net;
using namespace manifold::simple_mc;


// use a data structure to represent either a core node or a MC node
enum {CORE_NODE=0, MC_NODE};
struct Node_cid {
    int type; //either a CORE_NODE or a MC_NODE
    CompId_t proc_cid;
    CompId_t cache_cid;
    CompId_t mc_cid;
};


int main(int argc, char** argv)
{
    if(argc != 3) {
        cerr << "Usage: mpirun -np <NP> " << argv[0] << " <X> <Y>" << endl;
	cerr << "  X and Y are the dimension of the mesh; NP the number of MPI tasks, which must be 1, 2, or X*Y+1." << endl;
	exit(1);
    }

    const int X_DIMENSION = atoi(argv[1]);
    const int Y_DIMENSION = atoi(argv[2]);
    assert(X_DIMENSION > 0 && Y_DIMENSION > 0);
    assert(X_DIMENSION * Y_DIMENSION > 1);

    const int NUM_NODES = X_DIMENSION * Y_DIMENSION;

    Manifold::Init(argc, argv);

    int N_LPs = 1; //number of LPs
    MPI_Comm_size(MPI_COMM_WORLD, &N_LPs);
    cout << "Number of LPs = " << N_LPs << endl;
    if(N_LPs != 1 && N_LPs !=2 && N_LPs != NUM_NODES + 1) {
        cerr << "Number of LPs must be 1, 2, or " << NUM_NODES + 1 << "!" << endl;
	exit(1);
    }

    //==========================================================================
    // Configuration parameters
    //==========================================================================

    // No configuration for Processor

    //cache parameters
    simple_cache_settings cache_settings;
    cache_settings.name = "testCache";
    cache_settings.type = CACHE_DATA;
    cache_settings.size = 0x1 << 14; //16K
    cache_settings.assoc = 4;
    cache_settings.block_size = 32;
    cache_settings.hit_time = 2;
    cache_settings.lookup_time = 13;
    cache_settings.replacement_policy = RP_LRU;

    //network parameters
    // x and y dimensions already specified
    const int NETWORK_CREDITS = 4; //input buffer size for the network (both interfaces and routers)

    //memory controller configuration
    int MC_NODE_IDX = 0; //the node index of MC; could be any value between 0 and NUM_NODES-1
    assert(MC_NODE_IDX >=0 && MC_NODE_IDX < NUM_NODES);
    set<int> mc_node_idx_set;
    mc_node_idx_set.insert(MC_NODE_IDX);
    int MC_LATENCY = 53;


    //==========================================================================
    // create all the components.
    //==========================================================================

    Clock myClock(1000);

    // need storage for component IDs for connecting components

    Node_cid node_cids[NUM_NODES];

    //Terminal address to network address mapping
    //Using Simple_terminal_to_net_mapping means node ids must be 0 to NUM_NODES-1.
    Simple_terminal_to_net_mapping* mapping = new Simple_terminal_to_net_mapping();

    GenNI_flow_control_setting fc_setting;
    fc_setting.credits = NETWORK_CREDITS; //network interface's input buffer for router input

    InterfaceCreator* ifcreator = new GenInterfaceCreator<GenNetworkInterface<mem_req> >(fc_setting);
    MeshNetwork* myNetwork = new MeshNetwork(X_DIMENSION, Y_DIMENSION, NETWORK_CREDITS, 0, myClock, ifcreator, mapping); //network creates all the network interfaces.
    delete ifcreator;



#define REDIRECT_COUT

#ifdef REDIRECT_COUT
    // create a file into which to write debug/stats info.
    int Mytid;
    MPI_Comm_rank(MPI_COMM_WORLD, &Mytid);
    char buf[10];
    sprintf(buf, "DBG_LOG%d", Mytid);
    ofstream DBG_LOG(buf);

    //redirect cout to file.
    std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
    std::cout.rdbuf(DBG_LOG.rdbuf()); // redirect cout
#endif

    SimpleMcMap1* mc_map = new SimpleMcMap1(MC_NODE_IDX);

    if(N_LPs <= 2) {
        //Network is always on LP 0. If total 2 LPs, the rest would be on LP 1.
	LpId_t node_lp = (N_LPs == 1) ? 0 : 1;

	//Node ID is the same as its node index: between 0 and NUM_NODES-1
	for(int i=0; i<NUM_NODES; i++) {
	    if(mc_node_idx_set.find(i) != mc_node_idx_set.end()) {
		node_cids[i].type = MC_NODE;
		node_cids[i].mc_cid = Component :: Create<SimpleMC>(node_lp, i, MC_LATENCY);
	    }
	    else {
		node_cids[i].type = CORE_NODE;
		node_cids[i].proc_cid = Component :: Create<MockProc>(node_lp, i);
		node_cids[i].cache_cid = Component :: Create<simple_cache>(node_lp, i, cache_settings, mc_map);
	    }
	}
    }
    else {
        //More than 2 LPs: Network on LP 0; each node is on a different LP.
	for(int i=0; i<NUM_NODES; i++) {
	    if(mc_node_idx_set.find(i) != mc_node_idx_set.end()) {
		node_cids[i].type = MC_NODE;
		node_cids[i].mc_cid = Component :: Create<SimpleMC>(i+1, i, MC_LATENCY);
	    }
	    else {
		node_cids[i].type = CORE_NODE;
		node_cids[i].proc_cid = Component :: Create<MockProc>(i+1, i);
		node_cids[i].cache_cid = Component :: Create<simple_cache>(i+1, i, cache_settings, mc_map);
	    }
	}
    }

    for(int i=0; i<NUM_NODES; i++) {
    if(node_cids[i].type == CORE_NODE)
    cout << i << "  core node" << endl;
    else
    cout << i << "  mc node" << endl;
    }

    std::vector<CompId_t>& ni_cids = myNetwork->get_interfaceCids();



    //==========================================================================
    //Now connect the components
    //==========================================================================
    std::vector<NetworkInterfaceBase*>& nis = myNetwork->get_interfaces();

    for(int i=0; i<NUM_NODES; i++) {
        if(node_cids[i].type == CORE_NODE) {
	    //some sanity check
	    MockProc* proc = Component :: GetComponent<MockProc>(node_cids[i].proc_cid);
	    if(proc != 0) {
		simple_cache* cache = Component :: GetComponent<simple_cache>(node_cids[i].cache_cid);
		assert(proc->get_nid() == cache->get_node_id());

		if(nis[i] != 0) { //only true when there is only 1 LP
		    assert(cache->get_node_id() == nis[i]->get_id());
		}
	    }
	

	    //proc to cache
	    Manifold :: Connect(node_cids[i].proc_cid, MockProc::OUT,
				node_cids[i].cache_cid, simple_cache::IN_FROM_UPPER,
				&simple_cache::handle_request, 1);
	    //cache to proc
	    Manifold :: Connect(node_cids[i].cache_cid, simple_cache::OUT_TO_UPPER,
				node_cids[i].proc_cid, MockProc::IN,
				&MockProc::handle_incoming, 1);
	    //cache to interface
	    Manifold :: Connect(node_cids[i].cache_cid, simple_cache::OUT_TO_LOWER,
				ni_cids[i], GenNetworkInterface<mem_req>::IN_FROM_TERMINAL,
				&GenNetworkInterface<mem_req>::handle_terminal, 1);
	    //interface to cache
	    Manifold :: Connect(ni_cids[i], GenNetworkInterface<mem_req>::OUT_TO_TERMINAL,
				node_cids[i].cache_cid, simple_cache::IN_FROM_LOWER,
				&simple_cache::handle_response, 1);

	    //Inside the network, routing is based on the interface IDs. The adapters have
	    //their own IDs. When one adapter (or its client to be more specific) sends
	    //data to another adapter (client), it uses adapter IDs. But it needs to know
	    //the destination's network interface ID so the network can deliver the packet.
	    //Therefore, the adapter layer should maintain a mapping between adapter IDs
	    //and network interface IDs.
	    //This is similar to the mapping between IP addresses and MAC addresses, except
	    //here the mapping is static and nothing like ARP is involved.

	    //In this simple network, we simplify things further such that the network interface
	    //IDs are the same as the adapter IDs, therefore the mapping becomes M(X)=X. So
	    //we don't need to keep any mapping. Sending to adapter X would be sending to
	    //network interface X.
	}
	else {//MC node
	    SimpleMC* mc = Component :: GetComponent<SimpleMC>(node_cids[i].mc_cid);
	    if(mc != 0 ) {
		if(nis[i] != 0) { //only true when both are in the same LP
		    assert(mc->get_nid() == nis[i]->get_id());
		}
	    }
	    //mc to interface
	    Manifold :: Connect(node_cids[i].mc_cid, SimpleMC::OUT,
				ni_cids[i], GenNetworkInterface<mem_req>::IN_FROM_TERMINAL,
				&GenNetworkInterface<mem_req>::handle_terminal, 1);
	    //interface to mc
	    Manifold :: Connect(ni_cids[i], GenNetworkInterface<mem_req>::OUT_TO_TERMINAL,
				node_cids[i].mc_cid, SimpleMC::IN,
				&SimpleMC::handle_incoming, 1);
	}
    }


    //==========================================================================
    // Register processors with the clock
    //==========================================================================

    for(int i=0; i<NUM_NODES; i++) {
        if(node_cids[i].type == CORE_NODE) {
	    MockProc* proc = Component :: GetComponent<MockProc>(node_cids[i].proc_cid);
	    if(proc) {
		Clock::Register(proc, &MockProc :: send_creq, (void(MockProc::*)(void))0);
	    }
	}
    }

    Manifold::StopAt(1200);
    Manifold::Run();

    for(int i=0; i<NUM_NODES; i++) {
        if(node_cids[i].type == CORE_NODE) {
	    MockProc* proc = Component :: GetComponent<MockProc>(node_cids[i].proc_cid);
	    if(proc)
		proc->print_stats(cout);
	}
	else {
	    SimpleMC* mc = Component :: GetComponent<SimpleMC>(node_cids[i].mc_cid);
	    if(mc)
		mc->print_stats(cout);
	}
    }

    myNetwork->print_stats(cout);
    Manifold :: print_stats(cout);


#ifdef REDIRECT_COUT
    std::cout.rdbuf(cout_sbuf);
#endif

    Manifold :: Finalize();

}
